{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "df = pd.read_csv(\n",
    "    \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
    "    header=None)\n",
    "df.columns = [\n",
    "    \"Age\", \"WorkClass\", \"fnlwgt\", \"Education\", \"EducationNum\",\n",
    "    \"MaritalStatus\", \"Occupation\", \"Relationship\", \"Race\", \"Gender\",\n",
    "    \"CapitalGain\", \"CapitalLoss\", \"HoursPerWeek\", \"NativeCountry\", \"Income\"\n",
    "]\n",
    "# df = df.sample(frac=0.1, random_state=1)\n",
    "train_cols = df.columns[0:-1]\n",
    "label = df.columns[-1]\n",
    "X = df[train_cols]\n",
    "y = df[label].apply(lambda x: 0 if x == \" <=50K\" else 1) #Turning response into 0 and 1\n",
    "\n",
    "seed = 42\n",
    "np.random.seed(seed)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Explore the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret import show\n",
    "from interpret.data import ClassHistogram\n",
    "\n",
    "hist = ClassHistogram().explain_data(X_train, y_train, name = 'Train Data')\n",
    "show(hist)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training multiple EBM models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.glassbox import ExplainableBoostingClassifier\n",
    "\n",
    "# Fitting multiple EBM models with different training datasets and random seeds\n",
    "seed =1\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)\n",
    "ebm1 = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)\n",
    "\n",
    "ebm1.fit(X_train, y_train)  \n",
    "\n",
    "seed +=10\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)\n",
    "\n",
    "ebm2 = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)\n",
    "ebm2.fit(X_train, y_train)  \n",
    "\n",
    "seed +=10\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)\n",
    "\n",
    "ebm3 = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)\n",
    "ebm3.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Merging multiple trained EBM models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Merging multiple EBM models\n",
    "\n",
    "from interpret.glassbox.ebm.utils import *\n",
    "from interpret import show\n",
    "\n",
    "models = [ebm1, ebm2 , ebm3]\n",
    "merged_ebm = merge_ebms(models=models)\n",
    "\n",
    "ebm_global = merged_ebm.explain_global(name='EBM')\n",
    "show(ebm_global)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}